import requests
from bs4 import BeautifulSoup
import pandas as pd


header ={
    "Referer":"https://ssr1.scrape.center/",
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
}
response = requests.get("https://p0.meituan.net/movie/ce4da3e03e655b5b88ed31b5cd7896cf62472.jpg@464w_644h_1e_1c",headers=header)
# print(response.text)
with open('test.jpg','wb') as f:
    f.write(response.content)
# 构造一个字典
movie_info = {
    "电影名称":[],
    "电影类型":[],
    "国家":[],
    "时长":[],
    "上映时间":[],
    "分数":[],
}
# response = requests.get("https://ssr1.scrape.center/",headers=header)
for i in range(11):
    response = requests.get("https://ssr1.scrape.center/page/%d" % i, headers=header)
    soup = BeautifulSoup(response.text, 'html.parser')
    result = soup.find_all(name='div', class_='p-h el-col el-col-24 el-col-xs-9 el-col-sm-13 el-col-md-16')
    # print(result)

    for i in range(len(result)):
        movie_info['电影名称'].append(result[i].h2.string)
        # print(result[i].h2.string)
        button = result[i].find_all(name="button")
        movie_type=''
        for btn in button:
            movie_type += btn.span.string +','
            # print(btn.span.string)
        movie_info['电影类型'].append(movie_type)
        info_list = result[i].find_all(name="div", class_="m-v-sm info")
        span_list = info_list[0].find_all(name="span")
        movie_info['国家'].append(span_list[0].string)
        movie_info['时长'].append(span_list[2].string)
        span_list = info_list[1].find_all(name="span")
        if len(span_list) > 0:
            movie_info['上映时间'].append(span_list[0].string)
        else:
            movie_info['上映时间'].append('')

        # for info in info_list:
        #     span_list = info.find_all(name="span")
        #     for span in span_list:
        #         if span.string != ' / ':
        #             print(span.string)
        score = soup.find_all(name="p", class_="score m-t-md m-b-n-sm")
        movie_info['分数'].append(score[i].string.strip())
        # print(score[i].string.strip())
        # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

print(movie_info)
# 构造出一个叫DataFrame类型的数据
data = pd.DataFrame(movie_info)
# print(data)
data.to_excel("./movieinfo.xlsx",index=False)






